---
title: "ivmte: Extrapolate to AT/NT (genlate) in Kern and Hainmueller (2009, PA)"
author: "Arthur Zeyang Yu"
date: "July 23, 2023"
output:
  html_document: default
  word_document: default
---

```{r setup, include=TRUE}
knitr::opts_chunk$set(echo = TRUE)

# clean memory
rm(list = ls())

# load packages
library(ivmte) # the version is 1.4.0
library(AER)
library(gurobi) # the solver we use is Gurobi
library(tidyverse)
library(ggplot2)
library(splines2)
library(readstata13)

# set directories
data_path <- "C:/Users/arthu/Dropbox/research/mte_at/codes/replication/data"
results_path <- "C:/Users/arthu/Dropbox/research/mte_at/codes/replication/results"

# read data
kh_2009pa <- read.dta13(paste0(data_path, "/kh_2009pa_cleaned.dta"))
```

# preparations
```{r}
p <- predict(lm(data = kh_2009pa, treatment ~ iv),
             newdata = data.frame(iv = c(0, 1)))
p0 <- p[1]
p1 <- p[2]

# arguments for always-takers
args_at <- list(data = kh_2009pa,
                ivlike = c(lenin ~ treatment + iv + treatment * iv),
                target = "genlate",
                genlate.lb = 0,
                genlate.ub = p0,
                m0 = ~ u + I(u^2),
                m1 = ~ u + I(u^2),
                propensity = treatment ~ iv,
                noisy = FALSE)

# arguments for never-takers
args_nt <- list(data = kh_2009pa,
                ivlike = c(lenin ~ treatment + iv + treatment * iv),
                target = "genlate",
                genlate.lb = p1,
                genlate.ub = 1,
                m0 = ~ u + I(u^2),
                m1 = ~ u + I(u^2),
                propensity = treatment ~ iv,
                noisy = FALSE)
```

# replicate Figure 1: Weights for Cross Moments
```{r}
# draw weights
p_z0 <- 1 - mean(kh_2009pa$iv)
p_z1 <- mean(kh_2009pa$iv)

# plot weights for m_0(u)
late_0 <- tibble(x = seq(p0, p1, length = 10), 
                 y = p1 - p0, quantity="LATE", size=1)
e_d1z1y_0 <- tibble(x = seq(0, 1, length = 10),
                    y = 0, quantity = "E[1{D = 1, Z = 1} Y]", size = 1)
e_d1z0y_0 <- tibble(x = seq(0, 1, length = 10),
                    y = 0, quantity = "E[1{D = 1, Z = 0} Y]", size = 2)
e_d0z1y_0 <- tibble(x = seq(p1, 1, length = 10),
                    y = p_z1, quantity = "E[1{D = 0, Z = 1} Y]", size = .63)
e_d0z0y_0 <- tibble(x = seq(p0, 1, length = 10),
                    y = p_z0, quantity = "E[1{D = 0, Z = 0} Y]", size = 1)

toplot <- rbind(late_0, e_d1z1y_0, e_d1z0y_0, e_d0z1y_0, e_d0z0y_0)

ggplot(toplot) + geom_line(aes(x, y, lty = quantity, 
                               color = as.factor(size)), 
                           size = 1) +
  labs(x = "Unobserved Heterogeneity: U", y = "Weights") + theme_classic() +
  scale_colour_grey(start = 0, end = .5) + geom_text(
    data = toplot %>% group_by(quantity) %>% summarise(
      x = min(x) + .15 + (min(size) - 1) * .7,
      y = min(y) + .03,
      size = min(size)
    ),
    aes(
      x = x,
      y = y,
      label = quantity,
      color = as.factor(size)
    )
  ) + theme(legend.position = "none")
ggsave(paste0(results_path, "/figure1_a.pdf"), width=5, height=3.5)

late_1 <- tibble(x = seq(p0, p1, length = 10), 
                 y = p1 - p0, quantity="LATE", size=1)
e_d1z1y_1 <- tibble(x = seq(0, p1, length = 10),
                    y = p_z1,quantity="E[1{D = 1, Z = 1} Y]", size=1)
e_d1z0y_1 <- tibble(x = seq(0, p0, length = 10),
                    y = p_z0, quantity="E[1{D = 1, Z = 0} Y]", size=1)
e_d0z1y_1 <- tibble(x = seq(0, 1, length = 10),
                    y = 0, quantity="E[1{D = 0, Z = 1} Y]", size=2)
e_d0z0y_1 <- tibble(x = seq(0, 1, length = 10),
                    y = 0, quantity="E[1{D = 0, Z = 0} Y]", size=1)

toplot2<-rbind(late_1,e_d1z1y_1,e_d1z0y_1,e_d0z1y_1, e_d0z0y_1)

ggplot(toplot2) + geom_line(aes(x, y, lty = quantity, 
                                color = as.factor(size)), 
                            size = 1) +
  labs(x = "Unobserved Heterogeneity: U", y = "Weights") + theme_classic() +
  scale_colour_grey(start = 0, end = .5) + 
  geom_text(
    data = toplot2 %>% group_by(quantity) %>% summarise(
      x = min(x) + .15 + (min(size) - 1) * .7,
      y = min(y) + .03,
      size = min(size)
    ),
    aes(
      x = x,
      y = y,
      label = quantity,
      color = as.factor(size)
    )
  )  + theme(legend.position = "none")
ggsave(paste0(results_path, "/figure1_b.pdf"), width=5, height=3.5)
```

# extrapolation with covariates without confidence intervals: mother_occ (replicate Figure 2: Extrapolate ATEs of Always-Takers and Never-Takers by Linear Programming)
```{r}
# IV-like estimands: four cross moments
args_at[["ivlike"]] = c(lenin ~ (treatment + iv + treatment * iv) * mother_occ)
args_at[["propensity"]] = treatment ~ mother_occ + iv + mother_occ * iv
args_nt[["ivlike"]] = c(lenin ~ (treatment + iv + treatment * iv) * mother_occ)
args_nt[["propensity"]] = treatment ~ mother_occ + iv + mother_occ * iv

# function: loop ivmte over different polynomial orders
ivmte_loop_poly <- function(at_nt, poly_order) {
  
  # matrix: saving results
  mat <- matrix(nrow = 3, ncol = length(poly_order)+1)
  mat[1, -1] <- poly_order
  mat[1, 1] <- "Polynomial Orders"
  mat[2, 1] <- "Lower Bound"
  mat[3, 1] <- "Upper Bound"

  # loop over different polynomial orders
  for (k in poly_order){
      
  # create polynomials for MTR
  j <- which(poly_order == k)
  i <- k
  x <- paste("I(u^", 1:i, ")", sep = "")
  m0_gen <- as.formula(paste(" ~ mother_occ + u * mother_occ +", 
                             paste(x, collapse = "+")))
      
    if (at_nt == "at") {
      
      # compute bounds and save results for AT
      args_at[["m0"]] <- m0_gen
      args_at[["m1"]] <- m0_gen
      results <- do.call(ivmte, args_at)
      mat[2, j+1] <- round(results$bounds[1], 3)
      mat[3, j+1] <- round(results$bounds[2], 3)

    } else {
      
      # compute bounds and save results for NT
      args_nt[["m0"]] <- m0_gen
      args_nt[["m1"]] <- m0_gen
      results <- do.call(ivmte, args_nt)
      mat[2, j+1] <- round(results$bounds[1], 3)
      mat[3, j+1] <- round(results$bounds[2], 3)

    }
    
  }
  
    # return results
    return(mat)
  
}

# extrapolate ATE of AT as genlate: polynomial orders loop from 1 to 8
set.seed(20230110)
at_bd <- ivmte_loop_poly(at_nt = "at", poly_order = seq(1, 8, by = 1))
nt_bd <- ivmte_loop_poly(at_nt = "nt", poly_order = seq(1, 8, by = 1))

# write up a data frame that produces figures
at_bd[, 1] <- c("poly", "lb", "ub")
nt_bd[, 1] <- c("poly", "lb", "ub")
poly_order <- seq(1, 8, by = 1)

at_gen_df <- as.data.frame(t(at_bd)[-1,], stringsAsFactors = FALSE)
colnames(at_gen_df) <- t(at_bd)[1,]
at_gen_df[, 1:3] <- sapply(at_gen_df[, 1:3], as.numeric)

# draw figure for point estimates
ggplot(at_gen_df, aes(x = poly)) +
  ylab("Bounds") +
  coord_cartesian(xlim = c(1, 8), ylim = c(-0.175, -0.05)) +
  scale_x_continuous("Polynomial Order",
                     labels = as.character(poly_order),
                     breaks = poly_order) +
  geom_line(aes(y = lb)) +
  geom_point(aes(y = lb)) +
  geom_line(aes(y = ub)) +
  geom_point(aes(y = ub)) +
  theme(axis.title = element_text(size = 25), 
        axis.text = element_text(size = 20))
ggsave(paste0(results_path, "/figure2_a.pdf"))

nt_gen_df <- as.data.frame(t(nt_bd)[-1,], stringsAsFactors = FALSE)
colnames(nt_gen_df) <- t(nt_bd)[1,]
nt_gen_df[, 1:3] <- sapply(nt_gen_df[, 1:3], as.numeric)

ggplot(nt_gen_df, aes(x = poly)) +
  ylab("Bounds") +
  coord_cartesian(xlim = c(1, 8), ylim = c(-1, 1)) +
  scale_x_continuous("Polynomial Order",
                     labels = as.character(poly_order),
                     breaks = poly_order) +
  geom_line(aes(y = lb)) +
  geom_point(aes(y = lb)) +
  geom_line(aes(y = ub)) +
  geom_point(aes(y = ub)) +
  theme(axis.title = element_text(size = 25), 
        axis.text = element_text(size = 20))
ggsave(paste0(results_path, "/figure2_b.pdf"))
```

# extrapolation with covariates: age_bin_5 (replicate Figure 5: Robustness Check: Extrapolate ATEs of Always-Takers and Never-Takers by Linear Programming)
```{r}
# IV-like estimands: four cross moments
args_at[["ivlike"]] = c(lenin ~ (treatment + iv + treatment * iv) * age_bin_5)
args_at[["propensity"]] = treatment ~ age_bin_5 + iv + age_bin_5 * iv
args_nt[["ivlike"]] = c(lenin ~ (treatment + iv + treatment * iv) * age_bin_5)
args_nt[["propensity"]] = treatment ~ age_bin_5 + iv + age_bin_5 * iv

# function: loop ivmte over different polynomial orders
ivmte_loop_poly <- function(at_nt, poly_order) {
  
  # matrix: saving results
  mat <- matrix(nrow = 3, ncol = length(poly_order)+1)
  mat[1, -1] <- poly_order
  mat[1, 1] <- "Polynomial Orders"
  mat[2, 1] <- "Lower Bound"
  mat[3, 1] <- "Upper Bound"
  
  # loop over different polynomial orders
  for (k in poly_order){
      
  # create polynomials for MTR
  j <- which(poly_order == k)
  i <- k
  x <- paste("I(u^", 1:i, ")", sep = "")
  m0_gen <- as.formula(paste(" ~ age_bin_5 + u * age_bin_5 +", 
                             paste(x, collapse = "+")))
  
    if (at_nt == "at") {
      
      # compute bounds and save results for AT
      args_at[["m0"]] <- m0_gen
      args_at[["m1"]] <- m0_gen
      results <- do.call(ivmte, args_at)
      mat[2, j+1] <- round(results$bounds[1], 3)
      mat[3, j+1] <- round(results$bounds[2], 3)
      
    } else {
      
      # compute bounds and save results for NT
      args_nt[["m0"]] <- m0_gen
      args_nt[["m1"]] <- m0_gen
      results <- do.call(ivmte, args_nt)
      mat[2, j+1] <- round(results$bounds[1], 3)
      mat[3, j+1] <- round(results$bounds[2], 3)
      
    }
    
  }
  
    # return results
    return(mat)
  
}

# extrapolate ATE of AT as genlate: polynomial orders loop from 1 to 8
at_bd <- ivmte_loop_poly(at_nt = "at", poly_order = seq(1, 8, by = 1))
nt_bd <- ivmte_loop_poly(at_nt = "nt", poly_order = seq(1, 8, by = 1))

# draw figure
at_bd[, 1] <- c("poly", "lb", "ub")
nt_bd[, 1] <- c("poly", "lb", "ub")
poly_order <- seq(1, 8, by = 1)

# generate data frame by transposing result matrix
at_gen_df <- as.data.frame(t(at_bd)[-1,], stringsAsFactors = FALSE)
colnames(at_gen_df) <- t(at_bd)[1,]
at_gen_df[, 1:3] <- sapply(at_gen_df[, 1:3], as.numeric)

# generate plot
ggplot(at_gen_df, aes(x = poly)) +
  ylab("Bounds") +
  coord_cartesian(xlim = c(1, 8), ylim = c(-0.3, 0.15)) +
  scale_x_continuous("Polynomial Order",
                     labels = as.character(poly_order),
                     breaks = poly_order) +
  geom_line(aes(y = lb)) +
  geom_point(aes(y = lb)) +
  geom_line(aes(y = ub)) +
  geom_point(aes(y = ub)) +
  theme(axis.title = element_text(size = 25), 
        axis.text = element_text(size = 20))
ggsave(paste0(results_path, "/figure5_a.pdf"))

nt_gen_df <- as.data.frame(t(nt_bd)[-1,], stringsAsFactors = FALSE)
colnames(nt_gen_df) <- t(nt_bd)[1,]
nt_gen_df[, 1:3] <- sapply(nt_gen_df[, 1:3], as.numeric)

ggplot(nt_gen_df, aes(x = poly)) +
  ylab("Bounds") +
  coord_cartesian(xlim = c(1, 8), ylim = c(-1, 1)) +
  scale_x_continuous("Polynomial Order",
                     labels = as.character(poly_order),
                     breaks = poly_order) +
  geom_line(aes(y = lb)) +
  geom_point(aes(y = lb)) +
  geom_line(aes(y = ub)) +
  geom_point(aes(y = ub)) +
  theme(axis.title = element_text(size = 25), 
        axis.text = element_text(size = 20))
ggsave(paste0(results_path, "/figure5_b.pdf"))
```

# extrapolation with covariates: age_bin_10 (replicate Figure 5: Robustness Check: Extrapolate ATEs of Always-Takers and Never-Takers by Linear Programming)
```{r}
# IV-like estimands: four cross moments
args_at[["ivlike"]] = c(lenin ~ (treatment + iv + treatment * iv) * age_bin_10)
args_at[["propensity"]] = treatment ~ age_bin_10 + iv + age_bin_10 * iv
args_nt[["ivlike"]] = c(lenin ~ (treatment + iv + treatment * iv) * age_bin_10)
args_nt[["propensity"]] = treatment ~ age_bin_10 + iv + age_bin_10 * iv

# function: loop ivmte over different polynomial orders
ivmte_loop_poly <- function(at_nt, poly_order) {
  
  # matrix: saving results
  mat <- matrix(nrow = 3, ncol = length(poly_order)+1)
  mat[1, -1] <- poly_order
  mat[1, 1] <- "Polynomial Orders"
  mat[2, 1] <- "Lower Bound"
  mat[3, 1] <- "Upper Bound"
  
  # loop over different polynomial orders
  for (k in poly_order){
      
  # create polynomials for MTR
  j <- which(poly_order == k)
  i <- k
  x <- paste("I(u^", 1:i, ")", sep = "")
  m0_gen <- as.formula(paste(" ~ age_bin_10 + u * age_bin_10 +", 
                             paste(x, collapse = "+")))
      
    if (at_nt == "at") {
      
      # compute bounds and save results for AT
      args_at[["m0"]] <- m0_gen
      args_at[["m1"]] <- m0_gen
      results <- do.call(ivmte, args_at)
      mat[2, j+1] <- round(results$bounds[1], 3)
      mat[3, j+1] <- round(results$bounds[2], 3)
      
    } else {
      
      # compute bounds and save results for NT
      args_nt[["m0"]] <- m0_gen
      args_nt[["m1"]] <- m0_gen
      results <- do.call(ivmte, args_nt)
      mat[2, j+1] <- round(results$bounds[1], 3)
      mat[3, j+1] <- round(results$bounds[2], 3)
      
    }
    
  }
  
    # return results
    return(mat)
  
}

# extrapolate ATE of AT as genlate: polynomial orders loop from 1 to 8
at_bd <- ivmte_loop_poly(at_nt = "at", poly_order = seq(1, 8, by = 1))
nt_bd <- ivmte_loop_poly(at_nt = "nt", poly_order = seq(1, 8, by = 1))

# draw figure
at_bd[, 1] <- c("poly", "lb", "ub")
nt_bd[, 1] <- c("poly", "lb", "ub")
poly_order <- seq(1, 8, by = 1)

at_gen_df <- as.data.frame(t(at_bd)[-1,], stringsAsFactors = FALSE)
colnames(at_gen_df) <- t(at_bd)[1,]
at_gen_df[, 1:3] <- sapply(at_gen_df[, 1:3], as.numeric)

ggplot(at_gen_df, aes(x = poly)) +
  ylab("Bounds") +
  coord_cartesian(xlim = c(1, 8), ylim = c(-0.25, 0.15)) +
  scale_x_continuous("Polynomial Order",
                     labels = as.character(poly_order),
                     breaks = poly_order) +
  geom_line(aes(y = lb)) +
  geom_point(aes(y = lb)) +
  geom_line(aes(y = ub)) +
  geom_point(aes(y = ub)) +
  theme(axis.title = element_text(size = 25), 
        axis.text = element_text(size = 20))
ggsave(paste0(results_path, "/figure5_c.pdf"))

nt_gen_df <- as.data.frame(t(nt_bd)[-1,], stringsAsFactors = FALSE)
colnames(nt_gen_df) <- t(nt_bd)[1,]
nt_gen_df[, 1:3] <- sapply(nt_gen_df[, 1:3], as.numeric)

ggplot(nt_gen_df, aes(x = poly)) +
  ylab("Bounds") +
  coord_cartesian(xlim = c(1, 8), ylim = c(-1, 1)) +
  scale_x_continuous("Polynomial Order",
                     labels = as.character(poly_order),
                     breaks = poly_order) +
  geom_line(aes(y = lb)) +
  geom_point(aes(y = lb)) +
  geom_line(aes(y = ub)) +
  geom_point(aes(y = ub)) +
  theme(axis.title = element_text(size = 25), 
        axis.text = element_text(size = 20))
ggsave(paste0(results_path, "/figure5_d.pdf"))
```
